#!/usr/bin/python3
import pymysql
import requests
from lxml import etree

# 设置请求头
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64; rv:64.0) Gecko/20100101 Firefox/64.0'
}
cookies = {
    'cookie': '"108300"; bid=x6CWhyewqig; __utma=30149280.485109852.1574991282.1574991282.1574991282.1; __utmc=30149280; __utmz=30149280.1574991282.1.1.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; ap_v=0,6.0; gr_user_id=056ec422-999e-4cc8-82e1-4757d6547298; gr_session_id_22c937bbd8ebd703f2d8e9445f7dfd03=a3df6afd-13f6-4470-b577-a185c4c218e3; gr_cs1_a3df6afd-13f6-4470-b577-a185c4c218e3=user_id%3A0; gr_session_id_22c937bbd8ebd703f2d8e9445f7dfd03_a3df6afd-13f6-4470-b577-a185c4c218e3=true; _vwo_uuid_v2=DEE210CAD2A1C37C8754AAD0465FEFC45|3c7ca2da3525dbfd34f59bad5030fbde; __gads=ID=9438760ce15e89b2:T=1574991108:S=ALNI_MbPmQ5P86tmDdlvMJS_0so-WCfqzQ; __utmb=30149280.4.10.1574991282'}
# 封装url请求方法

page_num = 1


def getlist(page_num1):
    url = "https://www.puercn.com/baicha/bcjg/index_" + page_num1 + ".html"
    response = requests.get(url, cookies=cookies, headers=headers).text
    tree = etree.HTML(response)
    name_list = tree.xpath('.//ul[@class="articles"]/li/h3/a')
    link = []
    for i in name_list:
        news_url = i.xpath('./@href')
        news_title = i.xpath('./text()')
        # print(news_url[0])
        # print(news_title)
        a = (news_title[0],news_url[0])
        link.append(a)
        # a += "("+news_title[0]+","+news_url[0]+")"

    return link


for i in range(1, 2):
    args = getlist(str(i))
    db = pymysql.connect(host="localhost", user="root", password="root", database="puercn_com")
    # 使用 cursor() 方法创建一个游标对象 cursor
    cursor = db.cursor()
    sql = "insert into news(title,link) values(%s,%s)"
    insert = cursor.executemany(sql, args)
    print('批量插入返回受影响的行数：', insert)
    cursor.close()
    db.commit()
    db.close()
    print('sql执行成功')
    print(i)
